Setup

Chunk Options
knitr::opts_chunk$set(
  echo = TRUE,
  include = TRUE,
  message = FALSE,
  warning = FALSE,
  fig.width = 12,
  fig.asp = .6,
  fig.align = "center",
  out.width = "90%"
)
Installations
library(tidyverse)
library(dplyr)
library(arsenal)
library(HH)
library(leaps)
library(corrplot)
library(faraway)
library(ggpubr)
library(broom)
library(ggplot2)
library(MASS)
library(patchwork)
Load Data Set
hate_crime = read_csv("data/HateCrimes.csv", col_types = "fffdddddd") %>%
  janitor::clean_names() %>%
  drop_na()

Data Exploration

Descriptive Summary of Data

Descriptive Statistics: Hate Crime Rate After 2016 Election
Overall (N=45)
Level of unemployment
- high 23 (51.1%)
- low 22 (48.9%)
Level of state urbanization
- low 21 (46.7%)
- high 24 (53.3%)
Median Household Income
- Mean (SD) 55299.49 (8979.49)
- Median (Q1, Q3) 54916.00 (48060.00, 60708.00)
- Min - Max 39552.00 - 76165.00
Percent of adults with a high school degree
- Mean (SD) 0.87 (0.03)
- Median (Q1, Q3) 0.87 (0.84, 0.89)
- Min - Max 0.80 - 0.92
Percent of population that are not US citizens
- Mean (SD) 0.06 (0.03)
- Median (Q1, Q3) 0.05 (0.03, 0.08)
- Min - Max 0.01 - 0.13
Income inequality index
- Mean (SD) 0.46 (0.02)
- Median (Q1, Q3) 0.46 (0.44, 0.47)
- Min - Max 0.42 - 0.53
Percent of population that are non-white
- Mean (SD) 0.32 (0.15)
- Median (Q1, Q3) 0.30 (0.21, 0.42)
- Min - Max 0.06 - 0.63
Hate crime rate per 100,000 population
- Mean (SD) 0.30 (0.25)
- Median (Q1, Q3) 0.23 (0.14, 0.35)
- Min - Max 0.07 - 1.52

Outcome Distribution

Histogram of Outcome Variable

From the histogram below, we observe our outcome distribution has right skewness, suggesting that we may need to check our normality assumption. Our QQ Plot also indicates severe departures from normality.

#Histogram of Outcome Distribution
hate_crime %>% 
  ggplot(aes(x = hate_crimes_per_100k_splc)) + 
  geom_histogram(color = "red", fill = "black") + 
  labs(
    title = "Distribution of Hate Crime Rates in 50 US States",
    x = "Hate Crime Rate per 100,000 Population",
    y = "Frequency of Distribution",
    caption = "Distribution of Hate Crime Rates ( 50 US States)")

QQ Plot of Outcome Variable
#QQplot of Outcome Distribution
hate_crimes_per_100k_splc = hate_crime$hate_crimes_per_100k_splc
qqnorm(hate_crimes_per_100k_splc, col = 2, pch = 19, cex = 1.5)
qq_plot = qqline(hate_crimes_per_100k_splc, col = 1,lwd = 2,lty = 2)

Shapiro-Wilk Test of Outcome Variable

After performing a Shapiro-Wilk test to check the normality assumption of our outcome distribution, we find evidence to suggest that our data deviates from normality.

# Perform Shapiro-Wilk test
shapiro.test(hate_crimes_per_100k_splc) %>% 
  broom::tidy() %>% 
  knitr::kable("simple")
statistic p.value method
0.7107896 0 Shapiro-Wilk normality test
Comparison of Basic Transformations

We apply a square root transformation and a natural log transformation to our outcome distribution, and compare the results of the data.

sqrt_transformation = hate_crime %>% 
  ggplot(aes(x = sqrt(hate_crimes_per_100k_splc))) + 
  geom_histogram(color = "red", fill = "black") + 
  labs(
    title = "Distribution of sqrt(Hate Crime Rates) in 50 US States",
    x = "sqrt(Hate Crime Rate per 100,000 Population)",
    y = "Frequency of Distribution",
    caption = "Distribution of Hate Crime Rates ( 50 US States)")

sqrt_qqplot = ggplot(hate_crime, aes(sample = sqrt(hate_crimes_per_100k_splc))) +
  stat_qq() + stat_qq_line() + 
  labs(
    title = "QQ Plot of sqrt(Hate Crime Rates) in 50 US States",
    x = "sqrt(Hate Crime Rate per 100,000 Population)",
    y = "Frequency of Distribution",
    caption = "Distribution of Hate Crime Rates ( 50 US States)")

ln_transformation = hate_crime %>% 
  ggplot(aes(x = log(hate_crimes_per_100k_splc))) + 
  geom_histogram(color = "red", fill = "black") + 
  labs(
    title = "Distribution of ln(Hate Crime Rates) in 50 US States",
    x = "ln(Hate Crime Rate per 100,000 Population)",
    y = "Frequency of Distribution",
    caption = "Distribution of Hate Crime Rates ( 50 US States)")

ln_qqplot = ggplot(hate_crime, aes(sample = log(hate_crimes_per_100k_splc))) + stat_qq() + stat_qq_line() + 
  labs(
    title = "QQ Plot of ln(Hate Crime Rates) in 50 US States",
    x = "ln(Hate Crime Rate per 100,000 Population)",
    y = "Frequency of Distribution",
    caption = "Distribution of Hate Crime Rates ( 50 US States)")
Sqrt vs. Ln Transformations

After visual inspection, we observe that our natural log transformation may be a good candidate to re-test our normality assumptions.

(sqrt_transformation + ln_transformation) / ( sqrt_qqplot + ln_qqplot)

Shapiro-Wilk Test on our Natural Log Transformation

From the results of our test, we observe that we fail to reject the null (our p-value > 0.05) and can state with 95% confidence that our natural log transformation does not significantly deviate from normality, so we can assume normality henceforth.

shapiro.test(log(hate_crimes_per_100k_splc)) %>% 
  broom::tidy() %>% 
  knitr::kable("simple", caption = "Shapiro Wilk Test")
Shapiro Wilk Test
statistic p.value method
0.9830847 0.7452961 Shapiro-Wilk normality test
Adding Linear Transformation
hate_crime = hate_crime %>% 
  mutate(
    ln_hate_crimes_per_100k_splc = log(hate_crimes_per_100k_splc)
)

Identifying Outliers

hate_crime %>%
  ggplot(aes(x = ln_hate_crimes_per_100k_splc, y = state, colors = state)) + 
  geom_col(color = "blue") +
  labs(
    title = "Outlier Analysis of 50 US States",
    x = "ln(Hate Crime Rate per 100,000 Population)",
    y = "Frequency of Distribution",
    caption = "Distribution of Hate Crime Rates (50 US States)"
  ) 

Upon Plotting a column graph of the hate crimes against their respective states, we can see that Wyoming, South Dakota, and North Dakota had no values and District of Columbia, Washington, Oregon, Minnesota, Massachusetts and Maine showed relatively large columns.

After Plotting a scatter plot of the same values, it was evident that these states were outliers that influenced the data set.

hate_crime %>%
  ggplot(aes(y = hate_crimes_per_100k_splc, x = state, colors = state)) +
  geom_point(aes(color = state)) +
  geom_smooth(method = "lm", se = F, color = "red") +
  theme(axis.text.x = element_text(angle = 90),
        legend.position = "none")

Associations b/w Predictors and Hate Crime Rate

We verify if the association between income inequality (median household income in this case), holds true, as well as explore associations of all the other covariates mentioned above and draw your own conclusions about each predictor’s significance.

hate_crime %>% 
  dplyr::select(-state,-unemployment,-urbanization) %>% 
  cor() %>% 
  knitr::kable(digits = 2)
median_household_income perc_population_with_high_school_degree perc_non_citizen gini_index perc_non_white hate_crimes_per_100k_splc ln_hate_crimes_per_100k_splc
median_household_income 1.00 0.65 0.30 -0.13 0.04 0.34 0.31
perc_population_with_high_school_degree 0.65 1.00 -0.26 -0.54 -0.50 0.26 0.30
perc_non_citizen 0.30 -0.26 1.00 0.48 0.75 0.24 0.14
gini_index -0.13 -0.54 0.48 1.00 0.55 0.38 0.22
perc_non_white 0.04 -0.50 0.75 0.55 1.00 0.11 -0.01
hate_crimes_per_100k_splc 0.34 0.26 0.24 0.38 0.11 1.00 0.89
ln_hate_crimes_per_100k_splc 0.31 0.30 0.14 0.22 -0.01 0.89 1.00
hate_crime %>% 
  dplyr::select(-state,-unemployment,-urbanization) %>%  #removing factor variables
  cor() %>% 
  corrplot::corrplot(method = "circle", type = "upper", diag = FALSE)

Correlation Plots between Predictors and Outcome
a = ggscatter(hate_crime, x = "median_household_income", y = "hate_crimes_per_100k_splc", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Median Household Income", ylab = "Hate Crime Rate (per 100k pop.)")

b = ggscatter(hate_crime, x = "perc_population_with_high_school_degree", y = "hate_crimes_per_100k_splc", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "% of People 25+ with High School Degree", ylab = "Hate Crime Rate (per 100k pop.)")

c = ggscatter(hate_crime, x = "perc_non_citizen", y = "hate_crimes_per_100k_splc", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "% of People Non-US Citizens", ylab = "Hate Crime Rate (per 100k pop.)")

d = ggscatter(hate_crime, x = "gini_index", y = "hate_crimes_per_100k_splc", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Income Inequality Index (0-100)", ylab = "Hate Crime Rate (per 100k pop.)")

e = ggscatter(hate_crime, x = "perc_non_white", y = "hate_crimes_per_100k_splc", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "% of People Non-White", ylab = "Hate Crime Rate (per 100k pop.)")

From our results, we observe that predictors, “gini_index” and “median_household_income” have the highest correlations to our outcome of interest.

(a + b + c) / (d + e)

Multicollinearity

# Scatter plot showing associations between numeric variables
hate_crime %>%
  dplyr::select(-state,-unemployment,-urbanization) %>%
  pairs()

Calculating VIF for all the predictors
# fitting MLR model on tidy data without state variable
mult_fit <-
  lm(
    ln_hate_crimes_per_100k_splc ~ unemployment + urbanization + median_household_income + perc_population_with_high_school_degree + perc_non_citizen + gini_index + perc_non_white,
    data = hate_crime
  )

vif(mult_fit) %>% knitr::kable("simple")
x
unemploymentlow 1.426492
urbanizationhigh 1.983246
median_household_income 3.108161
perc_population_with_high_school_degree 3.895361
perc_non_citizen 3.728286
gini_index 1.845436
perc_non_white 3.236419

All the predictors have a VIF below 5. This suggests that it would not be problematic to include them in the construction of the model. However, the correlation analysis shows that variables perc_non_white and perc_non_citizen have a moderate linear relationship with a correlation coefficient of 0.75.

Analysis of Predictor Interactions

Interaction between income equality and unemployment
ggplot(hate_crimedft,
       aes(
         x = gini_index,
         y = ln_hate_crimes_per_100k_splc,
         colour = factor(unemployment)
       )) +
  geom_point(size = 2) +
  geom_smooth(method = "lm",
              se = F,
              aes(
                group = factor(unemployment),
                color = factor(unemployment)
              )) +
  labs(title = "ln(Hate crime per 100k) people vs. Unemployment Status",
       x = "ln(gini index)", y = "ln(hate crime per 100k people)") +
  scale_color_manual(
    name = "Unemployment",
    labels = c("Low", "High"),
    values = c("blue", "red")
  )

reg1t <-
  lm(ln_hate_crimes_per_100k_splc ~ gini_index * factor(unemployment),
     data = hate_crimedft)
summary(reg1t)
## 
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ gini_index * factor(unemployment), 
##     data = hate_crimedft)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.08066 -0.39519 -0.00407  0.30086  1.51569 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        2.0684     2.6960   0.767    0.447
## gini_index                         2.9069     2.3159   1.255    0.217
## factor(unemployment)1              0.5293     3.6641   0.144    0.886
## gini_index:factor(unemployment)1   0.8122     3.2178   0.252    0.802
## 
## Residual standard error: 0.6305 on 41 degrees of freedom
## Multiple R-squared:  0.1214, Adjusted R-squared:  0.05711 
## F-statistic: 1.888 on 3 and 41 DF,  p-value: 0.1466

There is no significant interaction at 5% significance level. The relationship between hate crime per 100k people and income equality does not vary by unemployment status.

Interaction between income equality and urbanization
#Scatter plot - Hate_crime_per_100k_splc vs. gini index by urbanization 
ggplot(hate_crimedft, aes(x =gini_index, y = ln_hate_crimes_per_100k_splc, colour = factor(urbanization))) +         
  geom_point(size = 2) +                                                                     
  geom_smooth(method = "lm", se = F,                                          
              aes(group = factor(urbanization),                                  
                  color = factor(urbanization))) +                                        
  labs(title = "ln(Hate crime per 100k people) vs. income equality by urbanization status", 
       x = "ln(gini index)", y = "ln(hate crime per 100k people)") +
  scale_color_manual(name = "Urbanization", labels = c("Low", "High"), values = c("blue", "red"))    

reg2t <- lm(ln_hate_crimes_per_100k_splc ~ gini_index*factor(urbanization), data = hate_crimedft)
summary(reg2t)
## 
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ gini_index * factor(urbanization), 
##     data = hate_crimedft)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23999 -0.42661 -0.03661  0.42869  1.25787 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        -3.019      3.464  -0.872    0.388
## gini_index                         -1.263      2.970  -0.425    0.673
## factor(urbanization)1               4.834      4.113   1.175    0.247
## gini_index:factor(urbanization)1    4.081      3.579   1.140    0.261
## 
## Residual standard error: 0.6443 on 41 degrees of freedom
## Multiple R-squared:  0.08241,    Adjusted R-squared:  0.01527 
## F-statistic: 1.227 on 3 and 41 DF,  p-value: 0.312

There is no significant interaction at 5% significance level.The relationship between hate crime per 100k people and income equality does not vary by urbanization status.

Interaction between education level and unemployment
ggplot(
  hate_crimedft,
  aes(
    x = perc_population_with_high_school_degree,
    y = ln_hate_crimes_per_100k_splc,
    colour = factor(unemployment)
  )
) +
  geom_point(size = 2) +
  geom_smooth(method = "lm",
              se = F,
              aes(
                group = factor(unemployment),
                color = factor(unemployment)
              )) +
  labs(title = "Scatterplot of Hate crime per 100k people vs. income equality by unemploymnet status",
       x = "ln(percentage of population with high school degree and higher)", y = "ln(hate crime per 100k people)") +
  scale_color_manual(
    name = "Unemployment",
    labels = c("Low", "High"),
    values = c("blue", "red")
  )

reg11t <-
  lm(
    ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree * factor(unemployment),
    data = hate_crimedft
  )
summary(reg11t)
## 
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree * 
##     factor(unemployment), data = hate_crimedft)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2361 -0.3816  0.1036  0.3528  1.7602 
## 
## Coefficients:
##                                                               Estimate
## (Intercept)                                                    -1.1389
## perc_population_with_high_school_degree                         0.9443
## factor(unemployment)1                                           0.9631
## perc_population_with_high_school_degree:factor(unemployment)1   4.8987
##                                                               Std. Error
## (Intercept)                                                       0.5088
## perc_population_with_high_school_degree                           2.6854
## factor(unemployment)1                                             0.8374
## perc_population_with_high_school_degree:factor(unemployment)1     3.8931
##                                                               t value Pr(>|t|)
## (Intercept)                                                    -2.238   0.0307
## perc_population_with_high_school_degree                         0.352   0.7269
## factor(unemployment)1                                           1.150   0.2567
## perc_population_with_high_school_degree:factor(unemployment)1   1.258   0.2154
##                                                                
## (Intercept)                                                   *
## perc_population_with_high_school_degree                        
## factor(unemployment)1                                          
## perc_population_with_high_school_degree:factor(unemployment)1  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.63 on 41 degrees of freedom
## Multiple R-squared:  0.1228, Adjusted R-squared:  0.05865 
## F-statistic: 1.914 on 3 and 41 DF,  p-value: 0.1424

There is no significant interaction at 5% significance level. The relationship between hate crime per 100k people and education level does not vary by unemployment status.

Interaction between education level and urbanization
#Scatter plot - Hate_crime_per_100k_splc vs. education level by urbanization 
ggplot(hate_crimedft, aes(x = perc_population_with_high_school_degree, y = ln_hate_crimes_per_100k_splc, colour = factor(urbanization))) +         
  geom_point(size = 2) +                                                                     
  geom_smooth(method = "lm", se = F,                                          
              aes(group = factor(urbanization),                                  
                  color = factor(urbanization))) +                                        
  labs(title = "Scatterplot of Hate crime per 100k people vs. education level by urbanization status", 
       x = "ln(percentage of population with high school degree or higher)", y = "ln(hate crime per 100k people)") +
  scale_color_manual(name = "Urbanization", labels = c("Low", "High"), values = c("blue", "red"))    

reg22t <- lm(ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree*factor(urbanization), data = hate_crimedft)
summary(reg22t)
## 
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree * 
##     factor(urbanization), data = hate_crimedft)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37000 -0.40173  0.02948  0.45744  1.62894 
## 
## Coefficients:
##                                                               Estimate
## (Intercept)                                                    -1.1362
## perc_population_with_high_school_degree                         2.0724
## factor(urbanization)1                                           1.1960
## perc_population_with_high_school_degree:factor(urbanization)1   4.2938
##                                                               Std. Error
## (Intercept)                                                       0.4708
## perc_population_with_high_school_degree                           2.2729
## factor(urbanization)1                                             0.7401
## perc_population_with_high_school_degree:factor(urbanization)1     3.4439
##                                                               t value Pr(>|t|)
## (Intercept)                                                    -2.413   0.0204
## perc_population_with_high_school_degree                         0.912   0.3672
## factor(urbanization)1                                           1.616   0.1138
## perc_population_with_high_school_degree:factor(urbanization)1   1.247   0.2196
##                                                                
## (Intercept)                                                   *
## perc_population_with_high_school_degree                        
## factor(urbanization)1                                          
## perc_population_with_high_school_degree:factor(urbanization)1  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6118 on 41 degrees of freedom
## Multiple R-squared:  0.1727, Adjusted R-squared:  0.1122 
## F-statistic: 2.853 on 3 and 41 DF,  p-value: 0.04888

There is no significant interaction at 5% significance level.The relationship between hate crime per 100k people and education level does not vary by urbanization status.

Model Selection

Fit model with all predictors

hate_crime_no_dc <- hate_crime[c(-9),] %>%
  dplyr::select(-state)

mult.fit <- lm(log(hate_crimes_per_100k_splc) ~ ., data = hate_crime_no_dc)
step(mult.fit, direction='both')
## Start:  AIC=-3232.37
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization + 
##     median_household_income + perc_population_with_high_school_degree + 
##     perc_non_citizen + gini_index + perc_non_white + ln_hate_crimes_per_100k_splc
## 
##                                           Df Sum of Sq    RSS     AIC
## - perc_non_white                           1     0.000  0.000 -3365.4
## <none>                                                  0.000 -3232.4
## - perc_non_citizen                         1     0.000  0.000 -3232.2
## - perc_population_with_high_school_degree  1     0.000  0.000 -3222.0
## - urbanization                             1     0.000  0.000 -3220.0
## - unemployment                             1     0.000  0.000 -3209.8
## - median_household_income                  1     0.000  0.000 -3193.4
## - gini_index                               1     0.000  0.000 -3191.0
## - ln_hate_crimes_per_100k_splc             1    11.909 11.909   -41.5
## 
## Step:  AIC=-3365.39
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization + 
##     median_household_income + perc_population_with_high_school_degree + 
##     perc_non_citizen + gini_index + ln_hate_crimes_per_100k_splc
## 
##                                           Df Sum of Sq    RSS     AIC
## - median_household_income                  1     0.000  0.000 -3426.7
## <none>                                                  0.000 -3365.4
## + perc_non_white                           1     0.000  0.000 -3363.4
## - urbanization                             1     0.000  0.000 -3320.4
## - perc_non_citizen                         1     0.000  0.000 -3300.7
## - unemployment                             1     0.000  0.000 -3268.2
## - perc_population_with_high_school_degree  1     0.000  0.000 -3215.4
## - gini_index                               1     0.000  0.000 -3183.2
## - ln_hate_crimes_per_100k_splc             1    11.983 11.983   -43.2
## 
## Step:  AIC=-3426.7
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization + 
##     perc_population_with_high_school_degree + perc_non_citizen + 
##     gini_index + ln_hate_crimes_per_100k_splc
## 
##                                           Df Sum of Sq    RSS     AIC
## + perc_non_white                           1     0.000  0.000 -3428.5
## <none>                                                  0.000 -3426.7
## + median_household_income                  1     0.000  0.000 -3425.3
## - gini_index                               1     0.000  0.000 -3309.9
## - perc_population_with_high_school_degree  1     0.000  0.000 -3271.3
## - urbanization                             1     0.000  0.000 -3188.3
## - perc_non_citizen                         1     0.000  0.000 -3169.9
## - unemployment                             1     0.000  0.000 -3125.0
## - ln_hate_crimes_per_100k_splc             1    12.048 12.048   -45.0
## 
## Step:  AIC=-3428.51
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization + 
##     perc_population_with_high_school_degree + perc_non_citizen + 
##     gini_index + ln_hate_crimes_per_100k_splc + perc_non_white
## 
##                                           Df Sum of Sq    RSS     AIC
## <none>                                                  0.000 -3428.5
## + median_household_income                  1     0.000  0.000 -3427.6
## - perc_non_white                           1     0.000  0.000 -3426.7
## - gini_index                               1     0.000  0.000 -3309.1
## - perc_population_with_high_school_degree  1     0.000  0.000 -3271.6
## - urbanization                             1     0.000  0.000 -3186.3
## - perc_non_citizen                         1     0.000  0.000 -3168.2
## - unemployment                             1     0.000  0.000 -3123.6
## - ln_hate_crimes_per_100k_splc             1    11.957 11.957   -43.3
## 
## Call:
## lm(formula = log(hate_crimes_per_100k_splc) ~ unemployment + 
##     urbanization + perc_population_with_high_school_degree + 
##     perc_non_citizen + gini_index + ln_hate_crimes_per_100k_splc + 
##     perc_non_white, data = hate_crime_no_dc)
## 
## Coefficients:
##                             (Intercept)  
##                              -1.713e-17  
##                         unemploymentlow  
##                              -1.126e-19  
##                        urbanizationhigh  
##                              -3.782e-18  
## perc_population_with_high_school_degree  
##                               1.608e-17  
##                        perc_non_citizen  
##                               9.671e-17  
##                              gini_index  
##                               0.000e+00  
##            ln_hate_crimes_per_100k_splc  
##                               1.000e+00  
##                          perc_non_white  
##                              -3.726e-17

Based on the results of stepwise procedure, we choose model with 2 predictors: percent of adults 25 and older with a high school degreee and gini index.

Fit MLR based on Stepwise Model Result
stepwise_log_fit = lm(
  log(hate_crimes_per_100k_splc) ~ perc_population_with_high_school_degree + gini_index,
  data = hate_crime_no_dc
)

Model Diagnostics

#Check model assumptions:
par(mfrow = c(2, 2))
plot(stepwise_log_fit)